Setup

knitr::opts_chunk$set(echo = TRUE)

source("RaceID2_StemID_class.R")

## install required packages (only at first time)
install.packages(c("tsne","pheatmap","MASS","cluster","mclust","flexmix","lattice","fpc","RColorBrewer","permute","amap","locfit","vegan"), repos = "http://cran.us.r-project.org")
## Error in install.packages : Updating loaded packages
## input data
x <- read.csv("C:/Users/Mike/Documents/WORK/Bioinformatics Project Internship/Scripts/seperate-scripts/lymphnode-sc-transcriptomics/data/3 - combinedcounts/LNS_W_ALL.csv",sep=",",header=TRUE)
rownames(x) <- x$GENEID

# prdata: data.frame with transcript counts for all genes (rows) in all cells (columns); with rownames == gene ids; remove ERCC spike-ins 
prdata <- x[grep("ERCC",rownames(x),invert=TRUE),-1]

RaceID algorithm (has much unnessessary output, so hidden here)

RaceID Results

## Print amount of cells and outliers
print( paste(length(sc@out[[1]]), "Outliers Identified from", length(sc@fdata), "Cells", sep=" " ))
## [1] "250 Outliers Identified from 2303 Cells"
## diagnostic plots
# gap statistics: only if do.gap == TRUE

##plotgap(sc)
# plot within-cluster dispersion as a function of the cluster number: only if sat == TRUE
plotsaturation(sc,disp=TRUE)

# plot change of the within-cluster dispersion as a function of the cluster number: only if sat == TRUE
plotsaturation(sc)

# silhouette of k-medoids clusters          ### NOTE: DOES NOT WORK IN RSTUDIO: https://github.com/hemberg-lab/SC3/issues/45
#plotsilhouette(sc)  
# Jaccard's similarity of k-medoids clusters
plotjaccard(sc)

# barchart of outlier probabilities
plotoutlierprobs(sc)

# regression of background model
plotbackground(sc)

# dependence of outlier number on probability threshold (probthr)
plotsensitivity(sc)

# heatmap of k-medoids cluster
clustheatmap(sc,final=FALSE,hmethod="single")

##  [1]  2  1  9  8 11  3  7  5 12 10  4  6
# heatmap of final cluster
clustheatmap(sc,final=TRUE,hmethod="single")

##  [1] 44 41 47 39 37 23 42 26 36 18 48 49 40 33 21 25 28 35 17 30 38 27 31
## [24] 13 29 24  4 32 20 46  8 43 11 12 16  3  1  7 14  6 10 34  9 19 22  5
## [47] 15  2 45
# highlight k-medoids clusters in t-SNE map
plottsne(sc,final=FALSE)

# highlight final clusters in t-SNE map
plottsne(sc,final=TRUE)

# highlight cell labels in t-SNE map  ### NOTE: BAD IDEA FOR THOUSANDS OF CELLS!
#plotlabelstsne(sc,labels=sub("(\\_\\d+)","",names(sc@ndata)))
# highlight groups of cells by symbols in t-SNE map
plotsymbolstsne(sc,types=sub("(\\_\\d+)$","", names(sc@ndata)))

RaceID Output

StemID Algorithm (has much unnessessary output, so hidden here)

StemID Results

## diagnostic plots
# histogram of ratio between cell-to-cell distances in the embedded and the input space
plotdistanceratio(ltr)

# t-SNE map of the clusters with more than cthr cells including a minimum spanning tree for the cluster medoids
plotmap(ltr)

# visualization of the projections in t-SNE space overlayed with a minimum spanning tree connecting the cluster medoids
plotmapprojections(ltr)

# lineage tree showing the projections of all cells in t-SNE space
plottree(ltr,showCells=TRUE,nmode=FALSE,scthr=0.3)                        ### NOTE: SHOULD BE CHANGEABLE IN SNAKEMAKE

# lineage tree without showing the projections of all cells
plottree(ltr,showCells=FALSE,nmode=FALSE,scthr=0.3)                       ### NOTE: SHOULD BE CHANGEABLE IN SNAKEMAKE

# heatmap of the enrichment p-values for all inter-cluster links
plotlinkpv(ltr)

# heatmap of the link score for all inter-cluster links
plotlinkscore(ltr)

# heatmap showing the fold enrichment (or depletion) for significantly enriched or depleted links
projenrichment(ltr)

## Computing the StemID score
x <- compscore(ltr,nn=1)

## Plotting the StemID score
plotscore(ltr,1)